######################################################################
####State Capacity, Insurgency, and Civil War - Forecasting Onset ####
######################################################################
library(MASS) 
library(pscl)
library(foreign)
library(Hmisc)
library(rgdal)
library(RArcInfo)
library(stargazer)
library(mvtnorm)
library(Zelig)
library(readstata13)
library(pROC)
library(robust)
library(cvTools)
library(boot)
library(maps)
library(mapdata)
library(doBy)
library(raster)
library(survival)
library(spduration)
library(DAMisc)
library(plyr)
library(DataCombine)
library(plm)
library(AER)
library(lmtest)

########################
###Create The Dataset###
########################
###Importing the main data
#set working directory to location of dta file
setwd("~/Google Drive/NSLC/Final Dofiles/")
#Import the data
main.data <- read.dta("ntlgrid.dta")

##########################
###CW Onset Replication###
##########################
###Model 1: Civil War
#Subset data without NAs
log.1.dat <- na.omit(main.data[,c("onset", "second", "year", "ccode", "lnNL_sum", "lagcivconflagtemp", "loglagppp", "loglagpop", "mnt1", "logbdist1", "logross_oil_prod", "nwstate", "instability", "lagp_polity2", "logcellarea", "logttime", "gid", "ccode", "year93", "year94", "year95", "year96", "year97", "year98", "year99", "year00", "year01", "year02", "year03", "year04", "year05", "year06", "year07", "year08")])
#Create country code dummies manually
fccode <- factor(log.1.dat$ccode)
mydummies = model.matrix(~ fccode)[, -1]
mydummies <- data.frame(mydummies)
log.1.dat.s <- cbind(log.1.dat, mydummies)
#Subset data for analysis
log.1.dat.s2 <- subset(log.1.dat.s, year<=2006)
#Substate data for forecasting
log.1.dat.s2.pred <- subset(log.1.dat.s, year==2007|year==2008)
#Run the model with NL
log.FL.1.o.pred <- glm(onset ~ lnNL_sum + lagcivconflagtemp + loglagppp + loglagpop + mnt1 + logbdist1 +  
                      logross_oil_prod + nwstate + instability + lagp_polity2 + 
                      logcellarea + logttime + cluster(gid) + 
                      year93 + year94 + year95 + year96 + year97 +
                      year98 + year99 + year00 + year01 + year02 + year03 + year04 +year05 +
                      year06 + year07 + fccode20 +fccode41+fccode42+fccode70+fccode90+fccode91+
                      fccode92+fccode93+fccode94+fccode95+fccode100+fccode101+fccode110+fccode130+fccode135+
                      fccode140+fccode145+fccode150+fccode155+fccode160+fccode165+fccode200+fccode205+
                      fccode210+fccode211+fccode230+fccode235+fccode290+fccode305+fccode310+fccode316+
                      fccode317+fccode325+fccode339+fccode343+fccode344+fccode346+fccode350+fccode355+fccode359+
                      fccode360+fccode365+fccode366+fccode367+fccode368+fccode369+fccode370+fccode371+fccode372+
                      fccode373+fccode375+fccode380+fccode385+fccode390+fccode404+fccode420+fccode432+fccode433+
                      fccode434+fccode435+fccode436+fccode437+fccode438+fccode439+fccode450+fccode451+fccode452+
                      fccode461+fccode471+fccode475+fccode481+fccode482+fccode483+fccode484+fccode490+fccode500+
                      fccode501+fccode510+fccode516+fccode517+fccode520+fccode522+fccode530+fccode531+fccode540+
                      fccode541+fccode551+fccode552+fccode553+fccode560+fccode565+fccode570+fccode571+
                      fccode572+fccode600+fccode615+fccode616+fccode620+fccode625+fccode630+
                      fccode640+fccode645+fccode651+fccode652+fccode660 +
                      fccode663+fccode666+fccode670+fccode679+fccode690+fccode696+fccode698+
                      fccode700+fccode701+fccode702+fccode703+fccode704+fccode705+fccode710+
                      fccode712+fccode731+fccode732+fccode750+fccode760+fccode770+fccode771+
                      fccode775+fccode790+fccode800+fccode811+fccode812+fccode816+fccode820+
                      fccode850+fccode910,
                    data=log.1.dat.s2, family = "binomial")
AIC(log.FL.1.o.pred)
#Predict
pred.1.nl <- predict(log.FL.1.o.pred, newdata=log.1.dat.s2.pred)

#Run without nighttime light
log.FL.1.o.pred.no.nl  <- glm(onset ~ lagcivconflagtemp + loglagppp + loglagpop + mnt1 + logbdist1 +  
                                logross_oil_prod + nwstate + instability + lagp_polity2 + 
                                logcellarea + logttime + cluster(gid) + 
                                year93 + year94 + year95 + year96 + year97 +
                                year98 + year99 + year00 + year01 + year02 + year03 + year04 +year05 +
                                year06 + year07 + fccode20 + fccode41+fccode42+fccode70+fccode90+fccode91+
                                fccode92+fccode93+fccode94+fccode95+fccode100+fccode101+fccode110+fccode130+fccode135+
                                fccode140+fccode145+fccode150+fccode155+fccode160+fccode165+fccode200+fccode205+
                                fccode210+fccode211+fccode230+fccode235+fccode290+fccode305+fccode310+fccode316+
                                fccode317+fccode325+fccode339+fccode343+fccode344+fccode346+fccode350+fccode355+fccode359+
                                fccode360+fccode365+fccode366+fccode367+fccode368+fccode369+fccode370+fccode371+fccode372+
                                fccode373+fccode375+fccode380+fccode385+fccode390+fccode404+fccode420+fccode432+fccode433+
                                fccode434+fccode435+fccode436+fccode437+fccode438+fccode439+fccode450+fccode451+fccode452+
                                fccode461+fccode471+fccode475+fccode481+fccode482+fccode483+fccode484+fccode490+fccode500+
                                fccode501+fccode510+fccode516+fccode517+fccode520+fccode522+fccode530+fccode531+fccode540+
                                fccode541+fccode551+fccode552+fccode553+fccode560+fccode565+fccode570+fccode571+
                                fccode572+fccode600+fccode615+fccode616+fccode620+fccode625+fccode630+
                                fccode640+fccode645+fccode651+fccode652+fccode660 +
                                fccode663+fccode666+fccode670+fccode679+fccode690+fccode696+fccode698+
                                fccode700+fccode701+fccode702+fccode703+fccode704+fccode705+fccode710+
                                fccode712+fccode731+fccode732+fccode750+fccode760+fccode770+fccode771+
                                fccode775+fccode790+fccode800+fccode811+fccode812+fccode816+fccode820+
                                fccode850+fccode910,
                              data=log.1.dat.s2, family = "binomial")
AIC(log.FL.1.o.pred.no.nl)
#Predict
pred.1.no.nl <- predict(log.FL.1.o.pred.no.nl, newdata=log.1.dat.s2.pred)

#Create ROC for each prediction
roc.1.nl <-  roc(log.1.dat.s2.pred$onset, pred.1.nl)
roc.1.no.nl <-  roc(log.1.dat.s2.pred$onset, pred.1.no.nl)

#Compare the models
roc.test(roc.1.nl, roc.1.no.nl)

###Plot ROC for Model 1
ci.roc.1.nl <- ci.thresholds(roc.1.nl, conf.level=0.95)
pdf("ROCpred1.pdf")
roc.1.plot <- plot.roc(log.1.dat.s2.pred$onset, pred.1.nl,
                       main="RoC curve, Model 1 out-of-sample forecasts, 2007-2008",  xlab="False onset positives",ylab="True onset positives",
                       print.auc=TRUE, print.auc.x=0.5, print.auc.y=0.1, ci=TRUE)
plot(ci.roc.1.nl, length=.01*ifelse(attr(ci.roc.1.nl,"roc")$percent, 100, 1), col=par("fg"))
dev.off()

####Model 2: Ethnic War
#subset the required data for ethnic war
log.1.dat.s.eth <- log.1.dat.s[ which(log.1.dat.s$second > 0.04999),]
#Subset data for analysis
log.1.dat.s2.eth <- subset(log.1.dat.s.eth, year<=2006)
#Subset data for forecasting
log.1.dat.s2.eth.pred <- subset(log.1.dat.s.eth, year==2007|year==2008)
#Run the model
log.FL.2.o.pred <- glm(onset ~ lnNL_sum + lagcivconflagtemp + loglagppp + loglagpop + mnt1 + logbdist1 +  
                      logross_oil_prod + nwstate + instability + lagp_polity2 + 
                      logcellarea + logttime + cluster(gid) +
                      year93 + year94 + year95 + year96 + year97 +
                      year98 + year99 + year00 + year01 + year02 + year03 + year04 +year05 +
                      year06 + year07 + fccode20 + fccode41+fccode42+fccode70+fccode90+fccode91+
                      fccode92+fccode93+fccode94+fccode95+fccode100+fccode101+fccode110+fccode130+fccode135+
                      fccode140+fccode145+fccode150+fccode155+fccode160+fccode165+fccode200+fccode205+
                      fccode210+fccode211+fccode230+fccode235+fccode290+fccode305+fccode310+fccode316+
                      fccode317+fccode325+fccode339+fccode343+fccode344+fccode346+fccode350+fccode355+fccode359+
                      fccode360+fccode365+fccode366+fccode367+fccode368+fccode369+fccode370+fccode371+fccode372+
                      fccode373+fccode375+fccode380+fccode385+fccode390+fccode404+fccode420+fccode432+fccode433+
                      fccode434+fccode435+fccode436+fccode437+fccode438+fccode439+fccode450+fccode451+fccode452+
                      fccode461+fccode471+fccode475+fccode481+fccode482+fccode483+fccode484+fccode490+fccode500+
                      fccode501+fccode510+fccode516+fccode517+fccode520+fccode522+fccode530+fccode531+fccode540+
                      fccode541+fccode551+fccode552+fccode553+fccode560+fccode565+fccode570+fccode571+
                      fccode572+fccode600+fccode615+fccode616+fccode620+fccode625+fccode630+
                      fccode640+fccode645+fccode651+fccode652+fccode660 +
                      fccode663+fccode666+fccode670+fccode679+fccode690+fccode696+fccode698+
                      fccode700+fccode701+fccode702+fccode703+fccode704+fccode705+fccode710+
                      fccode712+fccode731+fccode732+fccode750+fccode760+fccode770+fccode771+
                      fccode775+fccode790+fccode800+fccode811+fccode812+fccode816+fccode820+
                      fccode850+fccode910,
                    data=log.1.dat.s2.eth, family = "binomial")
AIC(log.FL.2.o.pred)
#Predict
pred.2.nl <- predict(log.FL.2.o.pred, newdata=log.1.dat.s2.eth.pred)

#Run without nighttime light
log.FL.2.o.pred.no.nl <- glm(onset ~ lagcivconflagtemp + loglagppp + loglagpop + mnt1 + logbdist1 +  
                               logross_oil_prod + nwstate + instability + lagp_polity2 + 
                               logcellarea + logttime + cluster(gid) +
                               year93 + year94 + year95 + year96 + year97 +
                               year98 + year99 + year00 + year01 + year02 + year03 + year04 +year05 +
                               year06 + year07 + fccode20 + fccode41+fccode42+fccode70+fccode90+fccode91+
                               fccode92+fccode93+fccode94+fccode95+fccode100+fccode101+fccode110+fccode130+fccode135+
                               fccode140+fccode145+fccode150+fccode155+fccode160+fccode165+fccode200+fccode205+
                               fccode210+fccode211+fccode230+fccode235+fccode290+fccode305+fccode310+fccode316+
                               fccode317+fccode325+fccode339+fccode343+fccode344+fccode346+fccode350+fccode355+fccode359+
                               fccode360+fccode365+fccode366+fccode367+fccode368+fccode369+fccode370+fccode371+fccode372+
                               fccode373+fccode375+fccode380+fccode385+fccode390+fccode404+fccode420+fccode432+fccode433+
                               fccode434+fccode435+fccode436+fccode437+fccode438+fccode439+fccode450+fccode451+fccode452+
                               fccode461+fccode471+fccode475+fccode481+fccode482+fccode483+fccode484+fccode490+fccode500+
                               fccode501+fccode510+fccode516+fccode517+fccode520+fccode522+fccode530+fccode531+fccode540+
                               fccode541+fccode551+fccode552+fccode553+fccode560+fccode565+fccode570+fccode571+
                               fccode572+fccode600+fccode615+fccode616+fccode620+fccode625+fccode630+
                               fccode640+fccode645+fccode651+fccode652+fccode660 +
                               fccode663+fccode666+fccode670+fccode679+fccode690+fccode696+fccode698+
                               fccode700+fccode701+fccode702+fccode703+fccode704+fccode705+fccode710+
                               fccode712+fccode731+fccode732+fccode750+fccode760+fccode770+fccode771+
                               fccode775+fccode790+fccode800+fccode811+fccode812+fccode816+fccode820+
                               fccode850+fccode910,
                             data=log.1.dat.s2.eth, family = "binomial")
AIC(log.FL.2.o.pred.no.nl)
#Predict
pred.2.no.nl <- predict(log.FL.2.o.pred.no.nl, newdata=log.1.dat.s2.eth.pred)

#Create ROC for each prediction
roc.2.nl <-  roc(log.1.dat.s2.eth.pred$onset, pred.2.nl)
roc.2.no.nl <-  roc(log.1.dat.s2.eth.pred$onset, pred.2.no.nl)
#Compare the models
roc.test(roc.2.nl, roc.2.no.nl)

###Plot ROC for Model 2
ci.roc.2.nl <- ci.thresholds(roc.2.nl, conf.level=0.95)
pdf("ROCpred2.pdf")
roc.2.plot <- plot.roc(log.1.dat.s2.eth.pred$onset, pred.2.nl,
                       main="RoC curve, Model 2 out-of-sample forecasts, 2007-2008",  xlab="False onset positives",ylab="True onset positives",
                       print.auc=TRUE, print.auc.x=0.5, print.auc.y=0.1, ci=TRUE)
plot(ci.roc.2.nl, length=.01*ifelse(attr(ci.roc.2.nl,"roc")$percent, 100, 1), col=par("fg"))
dev.off()


####Model 3: Civil War
#Subset data without NAs
log.2.dat <- na.omit(main.data[,c("onset", "year", "ccode", "lnNL_sum", "lagcivconflagtemp", "loglagppp", "loglagpop", "mnt1", "logbdist1", "logross_oil_prod", "nwstate", "instability", "laganocracy", "lagpolbin", "logcellarea", "logttime", "gid", "ccode", "year93", "year94", "year95", "year96", "year97", "year98", "year99", "year00", "year01", "year02", "year03", "year04", "year05", "year06", "year07", "year08")])
#Create dummies for each country manually
fccode <- factor(log.2.dat$ccode)
mydummies = model.matrix(~ fccode)[, -1]
mydummies <- data.frame(mydummies)
log.2.dat.s <- cbind(log.2.dat, mydummies)
#Subset data for analysis
log.2.dat.s2 <- subset(log.2.dat.s, year<=2006)
#Subset data for forecasting
log.2.dat.s2.pred <- subset(log.2.dat.s, year==2007|year==2008)

#The Model
log.FL.3.o.pred <- glm(onset ~ lnNL_sum + lagcivconflagtemp + loglagppp + loglagpop + mnt1 + logbdist1 +  
                       logross_oil_prod + nwstate + instability + 
                       laganocracy + lagpolbin + logcellarea + logttime + cluster(gid)  + 
                       year93 + year94 + year95 + year96 + year97 +
                       year98 + year99 + year00 + year01 + year02 + year03 + year04 +year05 +
                       year06 + year07 + fccode20 + fccode41+fccode42+fccode70+fccode90+fccode91+
                       fccode92+fccode93+fccode94+fccode95+fccode100+fccode101+fccode110+fccode130+fccode135+
                       fccode140+fccode145+fccode150+fccode155+fccode160+fccode165+fccode200+fccode205+
                       fccode210+fccode211+fccode230+fccode235+fccode290+fccode305+fccode310+fccode316+
                       fccode317+fccode325+fccode339+fccode343+fccode344+fccode346+fccode350+fccode355+fccode359+
                       fccode360+fccode365+fccode366+fccode367+fccode368+fccode369+fccode370+fccode371+fccode372+
                       fccode373+fccode375+fccode380+fccode385+fccode390+fccode404+fccode420+fccode432+fccode433+
                       fccode434+fccode435+fccode436+fccode437+fccode438+fccode439+fccode450+fccode451+fccode452+
                       fccode461+fccode471+fccode475+fccode481+fccode482+fccode483+fccode484+fccode490+fccode500+
                       fccode501+fccode510+fccode516+fccode517+fccode520+fccode522+fccode530+fccode531+fccode540+
                       fccode541+fccode551+fccode552+fccode553+fccode560+fccode565+fccode570+fccode571+
                       fccode572+fccode600+fccode615+fccode616+fccode620+fccode625+fccode630+
                       fccode640+fccode645+fccode651+fccode652+fccode660 +
                       fccode663+fccode666+fccode670+fccode679+fccode690+fccode696+fccode698+
                       fccode700+fccode701+fccode702+fccode703+fccode704+fccode705+fccode710+
                       fccode712+fccode731+fccode732+fccode750+fccode760+fccode770+fccode771+
                       fccode775+fccode790+fccode800+fccode811+fccode812+fccode816+fccode820+
                       fccode850+fccode910,
                     data=log.2.dat.s2, family = "binomial")
AIC(log.FL.3.o.pred)
#Predict
pred.3.nl <- predict(log.FL.3.o.pred, newdata=log.2.dat.s2.pred)

#Run without nighttime light
log.FL.3.o.pred.no.nl <- glm(onset ~ lagcivconflagtemp + loglagppp + loglagpop + mnt1 + logbdist1 +  
                               logross_oil_prod + nwstate + instability + 
                               laganocracy + lagpolbin + logcellarea + logttime + cluster(gid)  + 
                               year93 + year94 + year95 + year96 + year97 +
                               year98 + year99 + year00 + year01 + year02 + year03 + year04 +year05 +
                               year06 + year07 + fccode20 + fccode41+fccode42+fccode70+fccode90+fccode91+
                               fccode92+fccode93+fccode94+fccode95+fccode100+fccode101+fccode110+fccode130+fccode135+
                               fccode140+fccode145+fccode150+fccode155+fccode160+fccode165+fccode200+fccode205+
                               fccode210+fccode211+fccode230+fccode235+fccode290+fccode305+fccode310+fccode316+
                               fccode317+fccode325+fccode339+fccode343+fccode344+fccode346+fccode350+fccode355+fccode359+
                               fccode360+fccode365+fccode366+fccode367+fccode368+fccode369+fccode370+fccode371+fccode372+
                               fccode373+fccode375+fccode380+fccode385+fccode390+fccode404+fccode420+fccode432+fccode433+
                               fccode434+fccode435+fccode436+fccode437+fccode438+fccode439+fccode450+fccode451+fccode452+
                               fccode461+fccode471+fccode475+fccode481+fccode482+fccode483+fccode484+fccode490+fccode500+
                               fccode501+fccode510+fccode516+fccode517+fccode520+fccode522+fccode530+fccode531+fccode540+
                               fccode541+fccode551+fccode552+fccode553+fccode560+fccode565+fccode570+fccode571+
                               fccode572+fccode600+fccode615+fccode616+fccode620+fccode625+fccode630+
                               fccode640+fccode645+fccode651+fccode652+fccode660 +
                               fccode663+fccode666+fccode670+fccode679+fccode690+fccode696+fccode698+
                               fccode700+fccode701+fccode702+fccode703+fccode704+fccode705+fccode710+
                               fccode712+fccode731+fccode732+fccode750+fccode760+fccode770+fccode771+
                               fccode775+fccode790+fccode800+fccode811+fccode812+fccode816+fccode820+
                               fccode850+fccode910,
                             data=log.2.dat.s2, family = "binomial")
AIC(log.FL.3.o.pred.no.nl)
#Predict
pred.3.no.nl <- predict(log.FL.3.o.pred.no.nl, newdata=log.2.dat.s2.pred)

#Create ROC for each prediction
roc.3.nl <-  roc(log.2.dat.s2.pred$onset, pred.3.nl)
roc.3.no.nl <-  roc(log.2.dat.s2.pred$onset, pred.3.no.nl)

#Compare the models
roc.test(roc.3.nl, roc.3.no.nl)

###Plot ROC for Model 3
ci.roc.3.nl <- ci.thresholds(roc.3.nl, conf.level=0.95)
pdf("ROCpred3.pdf")
roc.3.plot <- plot.roc(log.2.dat.s2.pred$onset, pred.3.nl,
                       main="RoC curve, Model 3 out-of-sample forecasts, 2007-2008",  xlab="False onset positives",ylab="True onset positives",
                       print.auc=TRUE, print.auc.x=0.5, print.auc.y=0.1, ci=TRUE)
plot(ci.roc.3.nl, length=.01*ifelse(attr(ci.roc.3.nl,"roc")$percent, 100, 1), col=par("fg"))
dev.off()


#Export to LaTex
stargazer(log.FL.1.o.pred, log.FL.1.o.pred.no.nl, log.FL.2.o.pred, log.FL.2.o.pred.no.nl, log.FL.3.o.pred, log.FL.3.o.pred.no.nl)
